library(raster)
## Loading required package: sp
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.5.2
## rgdal: version: 1.4-8, (SVN revision 845)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.4.2, released 2019/06/28
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/gdal
##  GDAL binary built with GEOS: FALSE 
##  Loaded PROJ.4 runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/proj
##  Linking to sp version: 1.3-2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
## 
##     intersect, select, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(ggplot2)
library(pophelper)
## pophelper v2.3.1 ready.
library(scatterpie)
## 
## Attaching package: 'scatterpie'
## The following object is masked from 'package:sp':
## 
##     recenter
library(ggnewscale)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

First plot CV

### parvi
parvi_CV<-read.delim("../data/genetic/output/admixture/parvi/Kerror_parviglumis.txt", 
                    sep=" ", header=FALSE, stringsAsFactors = FALSE) %>%
                    mutate(K = parse_number(V3)) %>% 
                    dplyr::select(K, V4) %>% rename(CV=V4)
## Plot CV
p <- ggplot(parvi_CV, aes(x=K, y=CV)) + geom_point(size=1) + geom_line()  
p + theme(axis.title.x = element_text(face="bold", size=20),
          axis.text.x  = element_text(angle=90, vjust=0.5, size=16),
          axis.title.y = element_text(face="bold", size=20),
          axis.text.y  = element_text(size=16)) +
  ggtitle(label = "Z. m. parviglumis")

The CV error of the Admixture analysis doesn't show a clear K value, so we chose those K values where the slope changes for each species. For ** Z. m. parviglumis** this is K=13 and K=25, and for Z. m. mexicana K= 5, K=10 and K=20.

Z. m. parviglumis

Load admixture Q data and samples metadata

# get data
parvifiles<-c("../data/genetic/output/admixture/parvi/bytaxa_parviglumis.25.Q",
              "../data/genetic/output/admixture/parvi/bytaxa_parviglumis.13.Q")
parvi_list<- readQ(parvifiles)
# Check
attributes(parvi_list)
## $names
## [1] "bytaxa_parviglumis.25.Q" "bytaxa_parviglumis.13.Q"
# Get metadata
meta<-read.delim("../data/genetic/output/ADN_pasap_3604.txt")
parviPlinksamples<-read.table("../data/genetic/output/bytaxa_parviglumis.fam")
parvimeta<-meta[meta$POBL %in% parviPlinksamples$V2, ]
levels(parvimeta$ESTADO)[13]<-"EdoMex"
levels(parvimeta$ESTADO)[14]<-"Michoacan"
# add sample metadata to qlist
attributes(parvi_list[[1]])$row.names<-as.character(parvimeta$POBL)
attributes(parvi_list[[2]])$row.names<-as.character(parvimeta$POBL)
# drop unused levels
parvimeta<-droplevels(parvimeta)

Plot Qvals of both Ks of interest:

p1 <- plotQ(alignK(parvi_list[1:2]),
            sortind = "all",
            imgoutput="join", returnplot=T, sharedindlab=FALSE,
            exportplot=F,basesize=11)
grid.arrange(p1$plot[[1]])

Plot only K=13 ordering by all genetic clusters

p1<-plotQ(parvi_list[2],
            returnplot=T,
            exportplot=F,
            sortind = "all", basesize = 11,
           # showindlab= TRUE, useindlab=TRUE, indlabangle=90, indlabsize=0.1,
      exportpath = "../data/genetic/output/admixture/parvi")
plot(p1$plot[[1]])

Scatterpies of admixture groups

# Read raw Qval file
Qval<-read.table(paste0("../data/genetic/output/admixture/parvi/bytaxa_parviglumis.13.Q"))
names(Qval)<-paste0("K", 1:ncol(Qval))
# add metadata
Qval<-cbind(parvimeta$POB, parvimeta$INDIV, parvimeta$LONGITUDE, parvimeta$LATITUDE, Qval)
names(Qval)[1:4]<-c("POB", "INDIV", "LONGITUDE", "LATITUDE") 
# generate same colors than the ones used for admixture plot
piecolors<-gplots::rich.colors(13)
# plot scatterpie
ppie1<- ggplot() + geom_scatterpie(data=Qval,
                           aes(x=LONGITUDE, 
                               y=LATITUDE,
                               group=POB),
                               color=NA,
                               cols=paste0("K", 1:13)) +
 scale_fill_manual(values= piecolors,
                    name="Genetic cluster") + theme_bw()
  
ppie1

Adding populations names:

# plot scatterpie
ppie1 + geom_text(data=parvimeta,
                      aes(x=parvimeta$LONGITUDE,
                          y=parvimeta$LATITUDE,
                          label=POB),
                      color="grey40",
                     check_overlap = T,
                      hjust = 0, vjust=1, nudge_x = 0.0005,
                 size= 5.5)
## Warning: Use of `parvimeta$LONGITUDE` is discouraged. Use `LONGITUDE` instead.
## Warning: Use of `parvimeta$LATITUDE` is discouraged. Use `LATITUDE` instead.

Plot ordering populations in geographical order

# Get longitudinal order
df<-data.frame(POB=parvimeta$POB, LONGITUDE=parvimeta$LONGITUDE) %>%
    unique() %>%
    arrange(LONGITUDE)
unique(df$POB)
##  [1] BVPU BGUA BQUE BEJU BMAN BSAU BTAR BTAC BNOC BRED BCAR BHUE BTZI BTIQ BTUZ
## [16] BJUA BTAL BPCH BZUL BOTZ BVBR BTEJ BTEL BZAC BAPX BIXC BTCT BMAZ MALI BCOL
## [31] BHUI BMOR BOLI BOAX
## 34 Levels: BAPX BCAR BCOL BEJU BGUA BHUE BHUI BIXC BJUA BMAN BMAZ BMOR ... MALI
# set levels of Pops in longitudinal order
desired_order<-unique(df$POB)
parvimeta$POBorder<-factor(parvimeta$POB, levels = desired_order)
# change levels so that they sort in that order alphabetically
levels(parvimeta$POBorder)<-paste0(1:34, levels(parvimeta$POBorder))
levels(parvimeta$POBorder)[1:9]<-paste0("0", levels(parvimeta$POBorder)[1:9])
# set labels
labset2<-data.frame(POBorder=as.character(parvimeta$POBorder), 
                    ESTADO=as.character(parvimeta$ESTADO), stringsAsFactors = FALSE)
#plot
p<-plotQ(parvi_list[2],
            returnplot=T,
            exportplot=F,
            sortind = "all",
      grplab=labset2, 
      ordergrp=TRUE, 
      grplabangle= 45, grplabsize=10, linesize=2, divsize = 2, grplabheight= 2, grplabpos=0.5,
      exportpath = "../data/genetic/output/admixture/parvi")
plot(p$plot[[1]])

Extract in which PGD do the sampled individuals fall. This would be used to then add this information to the plot and see how well PGD correspond to genetic clusters.

First load raster data of Zea mays spp parviglumis SDM and the PGD that fell within it:

# path to rasters
my_rasters<-c("../data/spatial/modelosDarwinZea/Zea_mays_parviglumis.tif", # SDM 
              "../data/spatial/areasProxyDivGen/crop_to_sp_final/PDG_Zea_mays_parviglumis.tif") #SDM subdivied by Proxies of gen div
# stack rasters and add nicer name
parvi_rasters<-stack(my_rasters)
# check
names(parvi_rasters)
## [1] "Zea_mays_parviglumis"     "PDG_Zea_mays_parviglumis"
# Check projection
proj4string(parvi_rasters$PDG_Zea_mays_parviglumis)
## [1] "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"
# change projection to longlat for nicer visualization
newcrs <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
parvi_rasters<-projectRaster(parvi_rasters, crs=newcrs, method="ngb")
parvi_rasters<-stack(parvi_rasters)

Z. m. parviglumis has 29 PGD distributed like this:

# Get number of cells of each PDG
table(getValues(parvi_rasters$PDG_Zea_mays_parviglumis))
## 
##       0       2       5       6       7       8      10      11      15      20 
## 2242616     395    8786    6753       3    9641     425    2954    5340    1025 
##      21      23      25      32      36      37      38      39      40      41 
##    1070      92    2237     359    7557   31658    1731     393    4462     201 
##      42      43      48      58      84      86      90      92      93 
##       4    8556    5913      34    1290       6     501     239      50

Plot PGD and sampling points of genetic data.

# crop raster to make it more managable
parvi_small<- raster::crop(parvi_rasters$PDG_Zea_mays_parviglumis,
                           y=extent(c(-107, -96 , 15.5, 22)))
# convert to df to plot with ggplot
raster_df<-as.data.frame(rasterToPoints(parvi_small, spatial = TRUE))
raster_df$PDG_Zea_mays_parviglumis<-as.character(raster_df$PDG_Zea_mays_parviglumis)
# get nice colors
mycols<-c("grey40","#917031", "#4d6cd0", "#8bbb39", "#c556c5", "#53d06a", "#895dc8", "#42a331", "#d84492", "#69b974", "#d63c48", "#4dc0b1", "#d3542b", "#4b9cd3", "#d2a337", "#9289cf", "#b1b23a", "#9a5398", "#498331", "#cc4668", "#37835d", "#df89be", "#6e7721", "#9b496e", "#b9b06d", "#ac575b", "#65743a", "#e78b7a", "#db873d", "#a2552d")
# get labels with 0 as "NA" (it is not really NA, just the part of Mx out of the species range). True NA is outside of Mx.
mylabs<-unique(getValues(parvi_rasters$PDG_Zea_mays_parviglumis))[2:length(unique(getValues(parvi_rasters$PDG_Zea_mays_parviglumis)))] # ommit the first (true NA)
mylabs[1]<-"NA" # change 0 to "NA"
mylabs
##  [1] "NA" "5"  "32" "42" "10" "86" "90" "36" "20" "48" "92" "25" "2"  "11" "37"
## [16] "6"  "21" "43" "84" "93" "8"  "38" "15" "23" "40" "58" "41" "39" "7"
# plot PGD raster
p<- ggplot() +
    geom_raster(data = raster_df , aes(x = x, y = y, fill = PDG_Zea_mays_parviglumis)) +
    scale_fill_manual(values= alpha(mycols, 0.5) , name="PGD",
                      labels=mylabs) + # no prevent the area outside the SDM to appear as a PDG 0
    theme_bw() + 
    labs(x="", y= "") + theme(text = element_text(size = 25))
# add sampling points
p + geom_point(data=parvimeta, aes(x=LONGITUDE, 
                                   y=LATITUDE))
## Warning: Raster pixels are placed at uneven horizontal intervals and will be
## shifted. Consider using geom_tile() instead.

Plot scatterpies with genetic groups

p_piemap<-p + new_scale("fill") + # this is needed because raster and pie have diff fills
          geom_scatterpie(data=Qval,
                                   aes(x=LONGITUDE, 
                                       y=LATITUDE,
                                       group=POB),
                                       color="NA",
                                       cols=paste0("K", 1:13)) +
         scale_fill_manual(values= piecolors,
                            name="Genetic cluster") +
          labs(x="", y= "") + theme(text = element_text(size = 25))
p_piemap
## Warning: Raster pixels are placed at uneven horizontal intervals and will be
## shifted. Consider using geom_tile() instead.

Extract in which PGD each genetic sample falls:

# Create function to estimate the mode, omitting na values
Mode <- function(x) {
  ux <- unique(na.omit(x))
  ux[which.max(tabulate(match(x, ux)))]
}
# transform 0 value (out of sp SDM range) to NA so that only PGD are considered
parvi_rasters$PDG_Zea_mays_parviglumis<-reclassify(parvi_rasters$PDG_Zea_mays_parviglumis, 
                                                   rcl=c(0, .1, NA), include.lowest=TRUE)
# extract the PDG most frequent in a given buffer radio of the sampling point
# ignoring NA values (see Mode fun)
PGD_points<-raster::extract(parvi_rasters$PDG_Zea_mays_parviglumis,
                            buffer=2500, fun=Mode, # so that no points fell in only NAs
                    y=data.frame(long=parvimeta$LONGITUDE,
                                 lat=parvimeta$LATITUDE)) %>%
            as.character()
PGD_points<-parvimeta %>% dplyr::select(LONGITUDE, LATITUDE, DNASample, POB, POBorder, POBL) %>%
                  mutate(PGD_points=PGD_points) 
table(PGD_points$PGD_points)
## 
##  10  11  15  36  37  40  41  43  48   5   6   8  84 
##  37  54  12 130 707  68  42 276  33 205 112  62   5
# check if there are samples that did not fell in any PGD
sum(is.na(PGD_points$PGD_points)) 
## [1] 26
PGD_points[is.na(PGD_points$PGD_points), ]
##      LONGITUDE LATITUDE DNASample  POB POBorder        POBL PGD_points
## 1078 -99.28528 16.98056    186_10 BTCT   27BTCT BTCT_186_10       <NA>
## 1079 -99.28528 16.98056    186_11 BTCT   27BTCT BTCT_186_11       <NA>
## 1080 -99.28528 16.98056    186_12 BTCT   27BTCT BTCT_186_12       <NA>
## 1081 -99.28528 16.98056    186_13 BTCT   27BTCT BTCT_186_13       <NA>
## 1082 -99.28528 16.98056    186_14 BTCT   27BTCT BTCT_186_14       <NA>
## 1083 -99.28528 16.98056    186_16 BTCT   27BTCT BTCT_186_16       <NA>
## 1084 -99.28528 16.98056    186_17 BTCT   27BTCT BTCT_186_17       <NA>
## 1085 -99.28528 16.98056    186_18 BTCT   27BTCT BTCT_186_18       <NA>
## 1086 -99.28528 16.98056    186_19 BTCT   27BTCT BTCT_186_19       <NA>
## 1087 -99.28528 16.98056     186_1 BTCT   27BTCT  BTCT_186_1       <NA>
## 1088 -99.28528 16.98056    186_20 BTCT   27BTCT BTCT_186_20       <NA>
## 1089 -99.28528 16.98056    186_21 BTCT   27BTCT BTCT_186_21       <NA>
## 1090 -99.28528 16.98056    186_22 BTCT   27BTCT BTCT_186_22       <NA>
## 1091 -99.28528 16.98056    186_23 BTCT   27BTCT BTCT_186_23       <NA>
## 1092 -99.28528 16.98056    186_24 BTCT   27BTCT BTCT_186_24       <NA>
## 1093 -99.28528 16.98056    186_25 BTCT   27BTCT BTCT_186_25       <NA>
## 1094 -99.28528 16.98056    186_26 BTCT   27BTCT BTCT_186_26       <NA>
## 1095 -99.28528 16.98056    186_27 BTCT   27BTCT BTCT_186_27       <NA>
## 1096 -99.28528 16.98056    186_28 BTCT   27BTCT BTCT_186_28       <NA>
## 1097 -99.28528 16.98056    186_29 BTCT   27BTCT BTCT_186_29       <NA>
## 1098 -99.28528 16.98056     186_2 BTCT   27BTCT  BTCT_186_2       <NA>
## 1099 -99.28528 16.98056    186_30 BTCT   27BTCT BTCT_186_30       <NA>
## 1100 -99.28528 16.98056     186_4 BTCT   27BTCT  BTCT_186_4       <NA>
## 1101 -99.28528 16.98056     186_5 BTCT   27BTCT  BTCT_186_5       <NA>
## 1102 -99.28528 16.98056     186_7 BTCT   27BTCT  BTCT_186_7       <NA>
## 1103 -99.28528 16.98056     186_9 BTCT   27BTCT  BTCT_186_9       <NA>

Remove genetic samples that did not fell in any PGD

# check original n
nrow(PGD_points)
## [1] 1769
# get samples names that FELL in PGD
PGD_points<-PGD_points[!is.na(PGD_points$PGD_points), ]
fell_in_PGD<-as.character(PGD_points$POBL)
# check how many samples we have now
length(fell_in_PGD)
## [1] 1743
# keep only samples that fell in PGD in the admixture data
# get rows matching fell in PGD
x<-attributes(parvi_list[[2]])$row.names %in% fell_in_PGD
# subset list
subset_admix_list<-list(parvi_list[[2]][x,])
names(subset_admix_list)<-names(parvi_list[2])

Use this information to see in which genetic group our PGD fall:

## order PGD
# set levels of PGD more or less in lontigitudinal order
desired_order<-c("48", "36", "10", "5", "15", "11" , "37", "6", "84", "43", "8", "40", "41")
PGD_points$PGD_order<-factor(PGD_points$PGD_points, levels = desired_order)
# change levels so that they sort in that order alphabetically
levels(PGD_points$PGD_order)<-paste0(1:13, "_",levels(PGD_points$PGD_order))
levels(PGD_points$PGD_order)[1:9]<-paste0("0", levels(PGD_points$PGD_order)[1:9])
# set labels
labsetPGD<-data.frame(PGD_points=as.character(PGD_points$PGD_order),
                      POBorder=as.character(PGD_points$POBorder),
                      stringsAsFactors = FALSE)
p3<-plotQ(subset_admix_list[1],
            returnplot=T,
            exportplot=F,
            sortind = "all",
      grplab=labsetPGD, selgrp = "PGD_points",
      ordergrp=T, 
      grplabangle= 90, grplabsize=8, linesize=3, divsize = 3.5,
      exportpath = getwd(), height = 20, width = 80)
plot(p3$plot[[1]])

Omit labels for plot for the paper and save as png:

p3<-plotQ(subset_admix_list[1], 
            returnplot=T,
            exportplot=T,
            sortind = "all",
      grplab=labsetPGD, selgrp = "PGD_points",
      ordergrp=T, 
      grplabangle= 90, grplabsize=8, divsize = 1,
      showgrplab= FALSE, splab="",
      exportpath = getwd(), height = 2.5, width = 15,
      outputfilename="../figures/parviglumis13Q_subsetPGD")
## Drawing plot ...
## /Volumes/datos/DarwinUICN/Zonificacion/Zonas_de_vida/analisisUniCons_proxiGen/bin/../figures/parviglumis13Q_subsetPGD.png exported.

Save map for paper plot:

p_piemap<-p + theme(legend.position = "none") +
          new_scale("fill") + # this is needed because raster and pie have diff fills
          geom_scatterpie(data=Qval,
                                   aes(x=LONGITUDE, 
                                       y=LATITUDE,
                                       group=POB),
                                       color="NA",
                                       cols=paste0("K", 1:13)) +
         scale_fill_manual(values= piecolors)+
         labs(x="", y= "") + theme(text = element_text(size = 15))
# save to file
ggsave("../figures/piemap_parviglumis_pgd.png",
       plot=p_piemap, dpi=72,
       width=15, height = 10, units="cm")
## Warning: Raster pixels are placed at uneven horizontal intervals and will be
## shifted. Consider using geom_tile() instead.

Get session info:

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gridExtra_2.3    ggnewscale_0.4.5 scatterpie_0.1.5 pophelper_2.3.1 
##  [5] ggplot2_3.3.3    readr_1.4.0      dplyr_1.0.2      rgdal_1.4-8     
##  [9] raster_3.4-5     sp_1.4-4        
## 
## loaded via a namespace (and not attached):
##  [1] gtools_3.8.2        tidyselect_1.1.0    xfun_0.19          
##  [4] lpSolve_5.6.15      purrr_0.3.4         lattice_0.20-41    
##  [7] colorspace_2.0-0    vctrs_0.3.5         generics_0.1.0     
## [10] htmltools_0.5.0     label.switching_1.8 yaml_2.2.1         
## [13] rlang_0.4.9         pillar_1.4.7        glue_1.4.2         
## [16] withr_2.3.0         tweenr_1.0.1        rvcheck_0.1.8      
## [19] lifecycle_0.2.0     stringr_1.4.0       munsell_0.5.0      
## [22] combinat_0.0-8      gtable_0.3.0        caTools_1.17.1.2   
## [25] codetools_0.2-18    evaluate_0.14       labeling_0.4.2     
## [28] knitr_1.30          Rcpp_1.0.5          KernSmooth_2.23-18 
## [31] scales_1.1.1        BiocManager_1.30.10 farver_2.0.3       
## [34] gplots_3.1.0        ggforce_0.3.2       hms_0.5.3          
## [37] digest_0.6.27       stringi_1.5.3       polyclip_1.10-0    
## [40] grid_3.5.1          tools_3.5.1         bitops_1.0-6       
## [43] magrittr_2.0.1      tibble_3.0.4        crayon_1.3.4       
## [46] tidyr_1.0.2         pkgconfig_2.0.3     ellipsis_0.3.1     
## [49] MASS_7.3-53         rmarkdown_2.5       R6_2.5.0           
## [52] compiler_3.5.1